chunk options
CSS for scrollable output & Header colors
Turning scientific / Exponential numbers off
options(scipen = 999)David Robinson tidytuesday: https://www.youtube.com/watch?v=2RadZrpzTaA
library(tidyverse)
library(ggthemes)
library(tidytuesdayR)
library(tidytext)
library(scales)
library(lubridate)
library(glue)
library(ggtext)
library(widyr)
library(countrycode)
library(factoextra)
library(ggstream)
library(wesanderson) # color pallete
theme_viny_bright <- function(){
library(ggthemes)
ggthemes::theme_fivethirtyeight() %+replace%
theme(
axis.title = element_text(size = 10, face = "bold"),
axis.text = element_text(size = 9),
legend.text = element_text(size = 9),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
strip.background = element_blank(),
legend.background = element_rect(fill = NA),
legend.key = element_rect(fill = NA),
legend.position = "right",
legend.direction = "vertical",
plot.title = element_text(hjust = 0.5, size = 16, face = "bold",
family = "serif", margin=margin(0,0,15,0)),
plot.subtitle = element_text(hjust = 0.5,face = "plain", family = "serif", size = 9),
plot.caption = element_text(hjust = 1, size = 8)
)
}
theme_set(theme_viny_bright())tt <- tt_load("2021-03-23")
Downloading file 1 of 3: `unvotes.csv`
Downloading file 2 of 3: `roll_calls.csv`
Downloading file 3 of 3: `issues.csv`
ttunvotes <- tt$unvotes
head(unvotes)unvotes %>%
mutate_all(as.factor) %>%
summary() rcid country country_code vote
5181 : 193 Canada : 6176 CA : 6176 abstain:110893
5249 : 193 Denmark : 6170 DK : 6170 no : 65500
5313 : 193 Netherlands: 6170 NL : 6170 yes :693544
5394 : 193 Australia : 6166 AU : 6166
5549 : 193 Norway : 6162 NO : 6162
4979 : 192 Belgium : 6161 (Other):831195
(Other):868780 (Other) :832932 NA's : 7898
unvotes %>%
mutate_all(as.factor) %>%
group_by(country, vote) %>%
summarise(count = n()) %>%
mutate(pct = count/sum(count)) %>%
ungroup() %>%
filter(country == "India")unvotes %>%
mutate_all(as.factor) %>%
group_by(country, vote) %>%
summarise(count = n(), .groups = "drop_last") %>%
mutate(pct = count/sum(count)) %>%
group_by(vote) %>%
slice_max(pct, n = 10) %>%
mutate(country = str_wrap(country, width = 20)) %>%
ggplot(aes(x = pct, y = tidytext::reorder_within(country, by = pct,
within = vote))) +
geom_col() +
facet_wrap(~vote, scales = "free_y") +
scale_y_reordered() +
scale_x_continuous(labels = percent) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "top 10 countries by vote type",
y = "", x = "percentage of vote type",
caption = "created by ViSa")unvotes %>%
mutate_all(as.factor) %>%
group_by(country, vote) %>%
summarise(count = n(), .groups = "drop_last") %>%
mutate(pct = count/sum(count)) %>%
group_by(vote) %>%
arrange(desc(pct)) %>%
slice(c(1:10, (n() - 10):n() )) %>% # this selects both top & bottom 10
mutate(country = str_wrap(country, width = 20)) %>%
ggplot(aes(x = pct, y = tidytext::reorder_within(country, by = pct,
within = vote))) +
geom_point() +
facet_wrap(~vote, scales = "free_y") +
scale_y_reordered() +
scale_x_continuous(labels = percent) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "top 10 & bottom 10 countries by each vote type",
y = "", x = "percentage of vote type",
caption = "created by ViSa")unvotes %>%
mutate_all(as.factor) %>%
group_by(country) %>%
mutate(total_votes = n()) %>%
group_by(country, vote) %>%
summarise(count = n(), .groups = "drop_last",
total_votes = first(total_votes)) %>%
mutate(pct = count/sum(count)) %>%
group_by(vote) %>%
arrange(desc(pct)) %>%
slice(c(1:10, (n() - 10):n() )) %>% # this selects both top & bottom 10
mutate(country = str_wrap(country, width = 20)) %>%
ggplot(aes(x = pct,
y = tidytext::reorder_within(country, by = pct, within = vote))) +
geom_point(aes(size = total_votes)) +
facet_wrap(~vote, scales = "free_y") +
scale_y_reordered() +
scale_x_continuous(labels = percent) +
theme(axis.text.x = element_text(angle = 90), legend.position = "none") +
labs(title = "top 10 & bottom 10 countries by each vote type",
y = "", x = "percentage of vote type",
caption = "created by ViSa")unvotes <- unvotes %>%
mutate(vote_number = match(vote, c("no","abstain","yes")) -2)
unvotessaving object for flexdasbhoard
flex_topbottom <- unvotes %>%
mutate_all(as.factor) %>%
group_by(country) %>%
mutate(total_votes = n()) %>%
group_by(country, vote) %>%
summarise(count = n(), .groups = "drop_last",
total_votes = first(total_votes)) %>%
mutate(pct = count/sum(count),
country = str_wrap(country, width = 20)) %>%
group_by(vote) %>%
arrange(desc(pct))
flex_topbottom %>%
slice(c(1:10, (n() - 10):n() )) %>% # this selects both top & bottom 10
ggplot(aes(x = pct,
y = tidytext::reorder_within(country, by = pct, within = vote))) +
geom_point(aes(size = total_votes)) +
facet_wrap(~vote, scales = "free_y") +
scale_y_reordered() +
scale_x_continuous(labels = percent) +
theme(axis.text.x = element_text(angle = 90), legend.position = "none") +
labs(title = "top 10 & bottom 10 countries by each vote type",
y = "", x = "percentage of vote type",
caption = "created by ViSa")unvotes %>%
count(vote, vote_number)by_country <- unvotes %>%
group_by(country) %>%
summarise(n_votes = n(),
n_yes = sum(vote == "yes"),
pct_yes = n_yes / n_votes) %>%
arrange(desc(pct_yes)) %>%
filter(n_votes >= 100)
by_countrysummarize_votes <- function(df, min_votes = 10){
df %>%
summarise(n_votes = n(),
n_yes = sum(vote == "yes"),
pct_yes = n_yes / n_votes) %>%
arrange(desc(pct_yes)) %>%
filter(n_votes >= min_votes)
}by_country <- unvotes %>%
group_by(country) %>%
summarize_votes()
by_countryby_country %>%
mutate(country = fct_reorder(country, pct_yes)) %>%
slice(c(1:10, (n() - 10):n() )) %>%
ggplot(aes(x = pct_yes, y = country)) +
geom_point(aes(size = n_votes)) +
scale_x_continuous(labels = percent) +
labs(title = "Top & bottom countries with % of yes votes in UN",
caption = "created by ViSa")by_country %>%
mutate(country = fct_reorder(country, pct_yes)) %>%
slice_min(pct_yes, n = 20) %>%
ggplot(aes(x = pct_yes, y = country)) +
geom_point(aes(size = n_votes)) +
scale_x_continuous(labels = percent) +
labs(title = "Countries with least % of yes votes in UN",
y = "",
caption = "created by ViSa")tt$roll_callsunvotes <- unvotes %>%
left_join(tt$roll_calls %>%
select(rcid, date, amend), by = "rcid")
unvotesby_year <- unvotes %>%
group_by(year = year(date)) %>%
summarize_votes()
by_yearby_year %>%
ggplot(aes(x = year, y = pct_yes)) +
geom_line( size = .9) +
scale_y_continuous(labels = percent) +
expand_limits(y = 0)by_country_year <- unvotes %>%
group_by(year = year(date),
country) %>%
summarize_votes()
by_country_yearby_country_year %>%
filter(country %in% c("United States", "Canada", "Mali")) %>%
mutate(country = fct_reorder(country, pct_yes)) %>%
ggplot(aes(x = year, y = pct_yes, col = country)) +
geom_line(size = 0.9) +
scale_y_continuous(labels = percent) +
scale_color_discrete(guide = guide_legend(reverse = TRUE)) +
expand_limits(y = 0) +
labs(title = "% yes by countries over the years",
caption = "created by ViSa")country_yes_line <- function(filter_list = c("India")){
by_country_year %>%
filter(country %in% filter_list) %>%
mutate(country = fct_reorder(country, pct_yes)) %>%
ggplot(aes(x = year, y = pct_yes, col = country)) +
geom_line(size = 0.9) +
scale_y_continuous(labels = percent) +
# scale_color_discrete(guide = guide_legend(reverse = TRUE)) +
expand_limits(y = 0)+
labs(title = "% yes votes by countries over the years",
caption = "created by ViSa")
}country_yes_line(filter_list = c("India", "Russia", "United States", "China"))country_yes_line_facet <- function(filter_list = c("India")){
by_country_year %>%
filter(country %in% filter_list) %>%
mutate(country = fct_reorder(country, pct_yes)) %>%
ggplot(aes(x = year, y = pct_yes, col = country)) +
geom_line(size = 0.9, show.legend = FALSE) +
scale_y_continuous(labels = percent) +
# scale_color_discrete(guide = guide_legend(reverse = TRUE)) +
expand_limits(y = 0)+
facet_wrap(~country) +
labs(title = "% yes by countries over the years",
caption = "created by ViSa")
}country_yes_line_facet(c("India","Russia","China","France","Pakistan","Sweden"))country_yes_line_facet(c("India","Russia","France","Germany","United Kingdom","Turkey"))From above charts lot of countries is close to 75% yes_vote now and this happened from 1990’s. Some European & Western countries goes lower than 75% which are mostly NATO allies.
by_country_year <- unvotes %>%
bind_rows(unvotes %>% mutate(country = "Overall")) %>%
group_by(year = year(date),
country) %>%
summarize_votes()
by_country_year %>%
filter(country == "Overall")country_yes_line <- function(filter_list = c("India")){
by_country_year %>%
filter(country %in% filter_list) %>%
mutate(country = fct_reorder(country, pct_yes)) %>%
ggplot(aes(x = year, y = pct_yes)) +
geom_line(data = by_year, lty = 1, size = 1.8, alpha = 0.3) +
geom_line(aes(col = country), size = 0.9) +
scale_y_continuous(labels = percent) +
# scale_color_discrete(guide = guide_legend(reverse = TRUE)) +
expand_limits(y = 0)+
labs(title = "% yes by countries over the years with world average",
caption = "created by ViSa") +
theme(legend.position = "top",
legend.direction = "horizontal")
}country_yes_line(filter_list = c("India", "Russia", "United States", "China"))country_list <- unvotes %>%
distinct(country) %>%
pull()
country_list [1] "United States" "Canada"
[3] "Cuba" "Haiti"
[5] "Dominican Republic" "Mexico"
[7] "Guatemala" "Honduras"
[9] "El Salvador" "Nicaragua"
[11] "Costa Rica" "Panama"
[13] "Colombia" "Venezuela"
[15] "Ecuador" "Peru"
[17] "Brazil" "Bolivia"
[19] "Paraguay" "Chile"
[21] "Argentina" "Uruguay"
[23] "United Kingdom" "Netherlands"
[25] "Belgium" "Luxembourg"
[27] "France" "Poland"
[29] "Czechoslovakia" "Yugoslavia"
[31] "Greece" "Russia"
[33] "Ukraine" "Belarus"
[35] "Norway" "Denmark"
[37] "Liberia" "Ethiopia"
[39] "South Africa" "Iran"
[41] "Turkey" "Iraq"
[43] "Egypt" "Syria"
[45] "Lebanon" "Saudi Arabia"
[47] "Taiwan" "India"
[49] "Philippines" "Australia"
[51] "New Zealand" "Sweden"
[53] "Iceland" "Afghanistan"
[55] "Thailand" "Yemen Arab Republic"
[57] "Pakistan" "Myanmar (Burma)"
[59] "Israel" "Indonesia"
[61] "Hungary" "Jordan"
[63] "Sri Lanka" "Spain"
[65] "Romania" "Ireland"
[67] "Portugal" "Austria"
[69] "Italy" "Albania"
[71] "Bulgaria" "Finland"
[73] "Libya" "Nepal"
[75] "Cambodia" "Laos"
[77] "Sudan" "Morocco"
[79] "Tunisia" "Japan"
[81] "Ghana" "Malaysia"
[83] "Guinea" "Cyprus"
[85] "Mali" "Senegal"
[87] "Benin" "Niger"
[89] "Côte d’Ivoire" "Burkina Faso"
[91] "Togo" "Cameroon"
[93] "Gabon" "Central African Republic"
[95] "Chad" "Congo - Brazzaville"
[97] "Madagascar" "Somalia"
[99] "Nigeria" "Congo - Kinshasa"
[101] "Sierra Leone" "Mongolia"
[103] "Mauritania" "Tanzania"
[105] "Jamaica" "Burundi"
[107] "Rwanda" "Trinidad & Tobago"
[109] "Algeria" "Uganda"
[111] "Kuwait" "Kenya"
[113] "Zanzibar" "Malta"
[115] "Zambia" "Malawi"
[117] "Maldives" "Singapore"
[119] "Gambia" "Guyana"
[121] "Lesotho" "Botswana"
[123] "Barbados" "Yemen People's Republic"
[125] "Mauritius" "Equatorial Guinea"
[127] "Eswatini" "Fiji"
[129] "Bhutan" "Bahrain"
[131] "Qatar" "Oman"
[133] "China" "United Arab Emirates"
[135] "Federal Republic of Germany" "German Democratic Republic"
[137] "Bahamas" "Bangladesh"
[139] "Grenada" "Guinea-Bissau"
[141] "Cape Verde" "São Tomé & PrÃncipe"
[143] "Mozambique" "Comoros"
[145] "Papua New Guinea" "Suriname"
[147] "Angola" "Djibouti"
[149] "Vietnam" "Samoa"
[151] "Seychelles" "Solomon Islands"
[153] "St. Lucia" "Zimbabwe"
[155] "St. Vincent & Grenadines" "Vanuatu"
[157] "Belize" "Antigua & Barbuda"
[159] "Dominica" "St. Kitts & Nevis"
[161] "Brunei" "Liechtenstein"
[163] "Namibia" "Germany"
[165] "Estonia" "Latvia"
[167] "Lithuania" "Yemen"
[169] "North Korea" "South Korea"
[171] "Micronesia (Federated States of)" "Marshall Islands"
[173] "San Marino" "Bosnia & Herzegovina"
[175] "Armenia" "Azerbaijan"
[177] "Croatia" "Slovenia"
[179] "Moldova" "Turkmenistan"
[181] "Kyrgyzstan" "Kazakhstan"
[183] "Tajikistan" "Monaco"
[185] "Andorra" "Czechia"
[187] "Slovakia" "North Macedonia"
[189] "Eritrea" "Georgia"
[191] "Uzbekistan" "Palau"
[193] "Tonga" "Nauru"
[195] "Kiribati" "Tuvalu"
[197] "Switzerland" "Timor-Leste"
[199] "Montenegro" "South Sudan"
map_data("world") %>%
filter(region != "Antarctica") %>%
ggplot(aes(x = long, y = lat, group = group)) +
geom_polygon() +
theme_map()summarize_votes <- function(df, min_votes = 10){
df %>%
summarise(n_votes = n(),
n_yes = sum(vote == "yes"),
pct_yes = n_yes / n_votes,
.groups = "drop") %>%
arrange(desc(pct_yes)) %>%
filter(n_votes >= min_votes)
}by_country <- unvotes %>%
group_by(country, country_code) %>%
summarize_votes()
by_countryby_country_year <- unvotes %>%
group_by(year = year(date),
country,
country_code) %>%
summarize_votes()
by_country_yearlibrary(fuzzyjoin)world_data <- map_data("world") %>%
filter(region != "Antarctica") %>%
as_tibble() %>%
fuzzyjoin::regex_left_join(maps::iso3166 %>%
select(mapname, country_code = a2),
c(region = "mapname"))
world_dataworld_data %>%
left_join(by_country, by = "country_code") %>%
ggplot(aes(x = long, y = lat, group = group, fill = pct_yes)) +
geom_polygon() +
theme_map() +
scale_fill_gradient2(low = "red", high = "blue",
midpoint = .6, labels = percent) +
labs(fill = "% yes vote",
caption = "created by ViSa")library(countrycode)unvotes %>%
mutate(continent = countrycode(country_code, "iso2c", "continent")) %>%
group_by(continent, year = year(date)) %>%
summarize_votes() %>%
filter(!is.na(continent)) %>%
mutate(country = fct_reorder(continent, pct_yes)) %>%
ggplot(aes(x = year, y = pct_yes)) +
geom_line(data = by_year, lty = 1, size = 1.8, alpha = 0.3) +
geom_line(aes(col = continent), size = 0.9) +
scale_y_continuous(labels = percent) +
scale_color_discrete(guide = guide_legend(reverse = TRUE)) +
expand_limits(y = 0)+
labs(title = "% yes by continents over the years with world average",
caption = "created by ViSa")library(WDI)country_incomes <- WDI(indicator = c(gdp_per_capita = "NY.GDP.PCAP.PP.KD",
pop = "SP.POP.TOTL"),
start = 2019, end = 2019, extra = TRUE) %>%
as_tibble() %>%
select(country_code = iso2c, income, gdp_per_capita, pop) %>%
filter(!is.na(income)) %>%
mutate(income = fct_relevel(income, "Low income", "Lower middle income", "Upper middle income"))
country_incomesplot_by <- function(tbl, category){
tbl %>%
filter(!is.na({{category}})) %>%
mutate(category = fct_reorder({{category}}, pct_yes)) %>%
ggplot(aes(x = year, y = pct_yes)) +
# geom_line(data = by_year, lty = 1, size = 1.8, alpha = 0.3) +
geom_line(aes(col = {{category}}), size = 0.9) +
scale_y_continuous(labels = percent) +
scale_color_discrete(guide = guide_legend(reverse = TRUE)) +
expand_limits(y = 0)+
labs(title = "% yes by countries over the years with world average",
caption = "created by ViSa")
}unvotes %>%
inner_join(country_incomes, by = "country_code") %>%
group_by(income,
year = year(date)) %>%
summarize_votes() %>%
plot_by(income)unvotes %>%
filter(country %in% c("India","Russia")) %>%
select(rcid, country, vote_number) %>%
spread(country, vote_number, fill = 0)unvotes %>%
filter(country %in% c("India","Canada")) %>%
select(rcid, country, vote_number) %>%
spread(country, vote_number, fill = 0) %>%
summarize(correlation = cor(.[[2]],.[[3]]))corr_fn <- function(countries_sel = c("India", "United States")){
unvotes %>%
filter(country %in% countries_sel) %>%
select(rcid, country, vote_number) %>%
spread(country, vote_number, fill = 0) %>%
summarize(correlation = cor(.[[2]],.[[3]]))
}corr_fn(c("India","Pakistan"))library(widyr)unvotes %>%
pairwise_cor(item = country, feature = rcid, value = vote_number, sort = TRUE)library(tidygraph)
library(ggraph)tidygraph & ggraph from: https://youtu.be/mApnx5NJwQA?t=862
unvotes %>%
pairwise_cor(item = country, feature = rcid,
value = vote_number, sort = TRUE) %>%
filter(item1 == "India")corr_tbl <- unvotes %>%
pairwise_cor(item = country, feature = rcid,
value = vote_number, sort = TRUE) %>%
# filter(item1 == "India") %>%
left_join(y = (unvotes %>%
mutate(continent = countrycode(country_code, "iso2c", "continent")) %>%
select(country, continent) %>%
distinct()),
by = c("item2" = "country")) %>%
rename(continent_item2 = continent)
corr_tblcorr_tbl %>%
filter(item1 == "India",
!is.na(continent_item2)) %>%
arrange(desc(correlation)) %>%
slice(c(1:15, (n()- 0:15) )) %>%
mutate(item2 = str_wrap(item2, width = 25)) %>%
ggplot(aes(x = correlation,
y = reorder_within(item2, by = correlation, within = continent_item2),
col = continent_item2)) +
geom_point(show.legend = FALSE) +
geom_vline(xintercept = 0, lty = 2, size = 0.9, col = "tomato") +
facet_wrap(~continent_item2, scales = "free_y") +
scale_y_reordered() +
labs(title = "Top & bottom correlated countries in voting in UN to India",
y = "",
caption = "created by ViSa") +
theme(panel.grid.major.x = element_blank())top_corr_countries_with <- function(country_selected = "India", top_n_bottom = 15){
corr_tbl %>%
filter(item1 == country_selected,
!is.na(continent_item2)) %>%
arrange(desc(correlation)) %>%
slice(c(1:top_n_bottom, (n()- 0:top_n_bottom) )) %>%
mutate(item2 = str_wrap(item2, width = 25)) %>%
ggplot(aes(x = correlation,
y = reorder_within(item2, by = correlation, within = continent_item2),
col = correlation > 0)) +
geom_point() +
geom_errorbarh(height = 0, aes(xmin = correlation, xmax = 0)) +
geom_vline(xintercept = 0, lty = 2, size = 0.9, col = "midnightblue") +
facet_wrap(~continent_item2, scales = "free_y") +
scale_y_reordered() +
expand_limits(x = 0.8) +
labs(title = glue("Top & bottom correlated countries in voting in UN with <i>{country_selected}</i>"),
y = "",
caption = "created by ViSa") +
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
plot.title = ggtext::element_markdown(),
legend.position = "none")
}top_corr_countries_with(top_n_bottom = 20)top_corr_countries_with("France", top_n_bottom = 20)top_corr_countries_with("Pakistan", top_n_bottom = 20)top_corr_countries_with("Russia", top_n_bottom = 20)top_corr_countries_with("China", top_n_bottom = 20)top_corr_countries_with("Australia", top_n_bottom = 20)top_corr_countries_with("United Arab Emirates", top_n_bottom = 20)top_corr_countries_with("Saudi Arabia", top_n_bottom = 20)top_corr_countries_with("Israel", top_n_bottom = 20)top_corr_countries_with("Turkey", top_n_bottom = 20)top_corr_countries_with("United States", top_n_bottom = 20)corr_tbl %>%
group_by(country = item1) %>%
filter(!is.na(correlation)) %>%
mutate_if(is.character, as_factor) %>% #summary()
summarise(median_corr = median(correlation),
mean_corr = mean(correlation)) %>%
arrange(desc(median_corr))corr_tbl <- corr_tbl %>%
select(-continent_item2) %>%
mutate(continent1 = countrycode(item1, "country.name", "continent"),
continent2 = countrycode(item2, "country.name", "continent"))
corr_tblcorr_tbl %>%
na.omit() %>%
group_by(continent1, continent2) %>%
summarise(avg_corr = mean(correlation)) %>%
arrange(desc(avg_corr))intercontinent_corr <- corr_tbl %>%
na.omit() %>%
filter(continent1 == continent2) %>%
group_by(item1) %>%
summarise(avg_intercontinent_correlation = mean(correlation)) %>%
arrange(desc(avg_intercontinent_correlation))
intercontinent_corrtaiwan & israel have least corr which is in negative
library(plotly)ggplotly(world_data %>%
left_join((intercontinent_corr %>%
mutate(country_code = countrycode(item1, "country.name", "iso2c"))
),
by = c("country_code")) %>%
ggplot(aes(x = long, y = lat, group = group,
fill = avg_intercontinent_correlation, label = item1)) +
geom_polygon() +
theme_map() +
scale_fill_gradient2(low = "red", high = "blue",
midpoint = -0.1) +
labs(fill = "avg intercontinent correlation",
caption = "created by ViSa"))both taiwan & israel are hard to be seen in map
tt$issues %>%
count(issue)library(tidytext)tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short) %>%
anti_join(stop_words, by = "word") %>%
count(word, sort = TRUE)rc_words <- tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short) %>%
anti_join(stop_words, by = "word") %>%
select(rcid, word)
rc_wordsunvotes %>%
inner_join(rc_words, by = "rcid")unvotes %>%
inner_join(rc_words, by = "rcid") %>%
filter(country == "United States") %>%
group_by(word) %>%
summarize_votes(min_votes = 100) %>%
mutate(word = fct_reorder(word, pct_yes)) %>%
ggplot(aes(pct_yes, word)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) words_yes_votes <- function(country_selected = "India", min_vote_limit = 100){
unvotes %>%
inner_join(rc_words, by = "rcid") %>%
filter(country == country_selected) %>%
group_by(word) %>%
summarize_votes(min_votes = min_vote_limit) %>%
mutate(word = fct_reorder(word, pct_yes)) %>%
ggplot(aes(pct_yes, word)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) +
theme(plot.title = element_markdown()) +
labs(title = glue("Most common words for yes votes by <i>{country_selected}</i>"))
}words_yes_votes() words_yes_votes("Pakistan") words_yes_votes("Israel") words_yes_votes_multi <- function(country_selected = c("India","Russia"), min_vote_limit = 100){
unvotes %>%
inner_join(rc_words, by = "rcid") %>%
filter(country %in% country_selected) %>%
group_by(word, country) %>%
summarize_votes(min_votes = min_vote_limit) %>%
mutate(word = fct_reorder(word, pct_yes)) %>%
ggplot(aes(pct_yes, word, col = country)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) +
theme(plot.title = element_markdown()) #+
# labs(title = glue("Most common words for yes votes by <i>{country_selected}</i>"))
}words_yes_votes_multi()words_yes_votes_multi(c("India","Pakistan") )words_yes_votes_multi(c("India","China") )words_yes_votes_multi_diff <- function(country_selected = c("India","Russia"), min_vote_limit = 100){
unvotes %>%
inner_join(rc_words, by = "rcid") %>%
filter(country %in% country_selected) %>%
group_by(word, country) %>%
summarize_votes(min_votes = min_vote_limit) %>%
mutate(word = fct_reorder(word, pct_yes, function(x) max(x) - min(x))) %>%
ggplot(aes(pct_yes, word, col = country)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) +
theme(plot.title = element_markdown()) #+
# labs(title = glue("Most common words for yes votes by <i>{country_selected}</i>"))
}words_yes_votes_multi_diff()tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short, token = "ngrams", n =2) %>%
count(word, sort = TRUE)bi_grams <- tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short, token = "ngrams", n =2) %>%
na.omit() %>%
anti_join(stop_words, by = "word") %>%
select(rcid, word)
bi_gramsunvotes %>%
inner_join(bi_grams, by = "rcid") %>%
filter(country == "India") %>%
group_by(word) %>%
summarize_votes(min_votes = 20) %>%
mutate(word = fct_reorder(word, pct_yes)) %>%
ggplot(aes(pct_yes, word)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent)bigrams_yes_votes_multi <- function(country_selected = c("India","Russia"), min_vote_limit = 20)
{
unvotes %>%
inner_join(bi_grams, by = "rcid") %>%
filter(country %in% country_selected) %>%
group_by(word, country) %>%
summarize_votes(min_votes = min_vote_limit) %>%
mutate(word = fct_reorder(word, pct_yes)) %>%
ggplot(aes(pct_yes, word, col = country)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) #+
# theme(plot.title = element_markdown()) +
# labs(title = glue("Most common bi grams for yes votes by <i>{country_selected}</i>"))
}bigrams_yes_votes_multi()bigrams_yes_votes_multi(c("India","Pakistan"))bigrams_yes_votes_multi(c("India","United States"))bigrams_yes_votes_multi(c("India","France"))bigrams_yes_votes_multi_diff <- function(country_selected = c("India","Russia"), min_vote_limit = 20)
{
unvotes %>%
inner_join(bi_grams, by = "rcid") %>%
filter(country %in% country_selected) %>%
group_by(word, country) %>%
summarize_votes(min_votes = min_vote_limit) %>%
mutate(word = fct_reorder(word, pct_yes, function(x) max(x) - min(x))) %>%
ggplot(aes(pct_yes, word, col = country)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) #+
# theme(plot.title = element_markdown()) +
# labs(title = glue("Most common bi grams for yes votes by <i>{country_selected}</i>"))
}bigrams_yes_votes_multi_diff()bigrams_yes_votes_multi_diff(c("India","Pakistan"))tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short, token = "ngrams", n =4) %>%
na.omit() %>%
count(word, sort = TRUE)quad_grams <- tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short, token = "ngrams", n =4) %>%
na.omit() %>%
anti_join(stop_words, by = "word") %>%
select(rcid, word)
quad_gramsquadgrams_yes_votes_multi <- function(country_selected = c("India","Russia"), min_vote_limit = 5)
{
unvotes %>%
inner_join(quad_grams, by = "rcid") %>%
filter(country %in% country_selected) %>%
group_by(word, country) %>%
summarize_votes(min_votes = min_vote_limit) %>%
mutate(word = fct_reorder(word, pct_yes)) %>%
ggplot(aes(pct_yes, word, col = country)) +
geom_point(aes(size = n_votes))+
scale_x_continuous(labels = percent) #+
# theme(plot.title = element_markdown()) +
# labs(title = glue("Most common bi grams for yes votes by <i>{country_selected}</i>"))
}quadgrams_yes_votes_multi()quadgrams_yes_votes_multi(c("India","Canada"))library(stringr)tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short, token = "ngrams", n =2) %>%
na.omit() %>%
anti_join(stop_words, by = "word") %>%
select(rcid, word) %>%
filter(str_detect(word, regex("Kashmir", ignore_case = TRUE)))tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short, token = "ngrams", n =2) %>%
na.omit() %>%
anti_join(stop_words, by = "word") %>%
select(rcid, word, descr) %>%
filter(str_detect(descr, regex("Kashmir", ignore_case = TRUE)))rc_words <- tt$roll_calls %>%
filter(!is.na(short)) %>%
unnest_tokens(word, short) %>%
anti_join(stop_words, by = "word") %>%
distinct(rcid, word) %>%
add_count(word, name = "word_count", sort = TRUE) %>%
filter(word_count >= 100) %>%
select(rcid, word)
rc_wordsby_country_word <- unvotes %>%
inner_join(rc_words, by = "rcid") %>%
group_by(word, country) %>%
summarize_votes(min_votes = 0)
by_country_wordby_country_word %>%
widely_svd( word, country, pct_yes)by_country_word %>%
widely_svd(word, country, pct_yes) %>%
filter(dimension == 1) %>%
mutate(word = reorder_within(word, by = value, within = dimension)) %>%
top_n(30, abs(value)) %>%
ggplot(aes(x = value, y = word )) +
geom_col() +
# facet_wrap(~dimension, scale = "free_y") +
scale_y_reordered()by_country_word %>%
widely_svd(word, country, pct_yes) %>%
filter(dimension %in% c(1:6)) %>%
mutate(word = reorder_within(word, by = value, within = dimension)) %>%
top_n(90, abs(value)) %>%
ggplot(aes(x = value, y = word, fill = value > 0)) +
geom_col() +
facet_wrap(~dimension, scale = "free_y") +
scale_y_reordered() +
theme(axis.text.x = element_text(angle = 90),
legend.position = "none")Restricting number of principal components to 5
by_country_word %>%
widely_svd(word, country, pct_yes, nv = 5) %>%
# filter(dimension %in% c(1:6)) %>%
mutate(word = reorder_within(word, by = value, within = dimension)) %>%
top_n(120, abs(value)) %>%
ggplot(aes(x = value, y = word, fill = value > 0)) +
geom_col() +
facet_wrap(~dimension, scale = "free_y") +
scale_y_reordered() +
theme(axis.text.x = element_text(angle = 90),
legend.position = "none")by_country_word %>%
widely_svd(country, word, pct_yes) %>%
filter(dimension %in% c(3)) %>%
mutate(country = reorder_within(country, by = value, within = dimension)) %>%
top_n(25, abs(value)) %>%
ggplot(aes(x = value, y = country, fill = value > 0)) +
geom_col() +
# facet_wrap(~dimension, scale = "free_y") +
scale_y_reordered() +
theme(axis.text.x = element_text(angle = 90),
legend.position = "none")from: https://youtu.be/mApnx5NJwQA?t=982
clustering functions are not available in cran package
https://github.com/dgrtwo/widyr
# library(devtools)
# install_github("dgrtwo/widyr")library(widyr)widely_kmeans is not there is any version of widyr package.
library(factoextra)svd_dimen15 <- by_country_word %>%
widely_svd(country, word, pct_yes, nv = 15)svd_dimen15sapply(svd_dimen15, function(x) sum(is.na(x))) country dimension value
0 0 0
svd_dimen15_wide <- pivot_wider(data = svd_dimen15, id_cols = country, names_from = dimension, values_from = value)
svd_dimen15_widesvd_dimen15_wide <- as.data.frame(svd_dimen15_wide)rownames(svd_dimen15_wide) <- svd_dimen15_wide$countrysvd_dimen15_wide <- svd_dimen15_wide[,2:16]
svd_dimen15_widefactoextra::fviz_nbclust(scale(svd_dimen15_wide), kmeans, method = "wss")factoextra::fviz_nbclust(scale(svd_dimen15_wide), kmeans, method = "silhouette")factoextra::fviz_nbclust(scale(svd_dimen15_wide), kmeans, method = "gap_stat")Clustering k = 1,2,..., K.max (= 10): .. done
Bootstrapping, b = 1,2,..., B (= 100) [one "." per sample]:
.................................................. 50
.................................................. 100
hier5 <- hcut(svd_dimen15_wide, k = 5, stand = TRUE)
hier5
Call:
stats::hclust(d = x, method = hc_method)
Cluster method : ward.D2
Distance : euclidean
Number of objects: 200
fviz_dend(hier5, rect = TRUE, cex = 0.5,
k_colors = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07", "#ff5733")
)set.seed(123)
km.res5 <- kmeans(svd_dimen15_wide, 5, nstart = 25)
km.res5K-means clustering with 5 clusters of sizes 24, 5, 40, 27, 104
Cluster means:
1 2 3 4 5 6
1 -0.05883850 -0.15975310 0.02716216 -0.04221911 0.015228648 -0.049397895
2 -0.06787841 0.04167016 -0.02902809 -0.09510712 -0.073549499 -0.116434435
3 -0.06741513 0.02375440 0.06525529 0.07434288 -0.039768751 0.001051916
4 -0.04911880 -0.07182883 -0.01853074 0.05268736 -0.007599480 0.103131394
5 -0.07728224 0.02616081 -0.02244492 -0.02061601 0.004283506 -0.004755643
7 8 9 10 11 12
1 -0.0312774356 0.004835413 0.008346752 0.001524352 0.031526905 0.029606931
2 0.2250779070 0.298656113 -0.026532299 0.021728232 -0.002297519 0.028178317
3 0.0007964694 -0.003538696 0.012439233 -0.003694175 -0.015947688 0.032851438
4 0.0493076622 0.025998705 0.010950068 -0.019437228 0.004769787 -0.050350647
5 -0.0114965068 -0.017433816 -0.009033282 0.001226936 -0.004459528 -0.008699865
13 14 15
1 -0.03529428 0.006831908 0.010852234
2 -0.01010099 -0.031485246 0.061317479
3 -0.04221760 0.006435989 0.009337899
4 0.03235324 -0.023208761 -0.015128591
5 0.01960858 0.001030353 -0.004318087
Clustering vector:
Angola Armenia
5 5
Azerbaijan Bahrain
5 5
Bangladesh Bosnia & Herzegovina
5 5
Brunei Cape Verde
5 5
Comoros Djibouti
5 5
Guinea-Bissau Namibia
5 5
North Korea Qatar
5 5
São Tomé & PrÃncipe Seychelles
5 5
United Arab Emirates Vanuatu
5 3
Vietnam Yemen
5 5
Algeria Antigua & Barbuda
5 3
China Congo - Brazzaville
5 5
Dominica German Democratic Republic
3 2
Guinea Jordan
5 5
Kuwait Libya
5 5
Malaysia Mali
5 5
Mauritania Monaco
5 1
Mongolia Morocco
5 5
Mozambique Oman
5 5
Senegal Somalia
5 5
St. Kitts & Nevis St. Lucia
3 3
St. Vincent & Grenadines Sudan
3 5
Suriname Tajikistan
5 1
Tanzania Tunisia
5 5
Uganda Yemen People's Republic
5 2
Zambia Zimbabwe
5 5
Kiribati Nauru
3 3
Taiwan Timor-Leste
2 1
Tonga Turkmenistan
3 1
Tuvalu Federal Republic of Germany
1 4
Andorra Belize
1 3
Croatia Czechia
1 1
Eritrea Estonia
1 1
Georgia Germany
1 1
Grenada Kazakhstan
3 1
Kyrgyzstan Latvia
1 1
Liechtenstein Lithuania
4 1
Moldova Montenegro
1 1
North Macedonia Slovakia
1 1
Slovenia South Korea
1 4
Switzerland South Sudan
1 4
Benin Burkina Faso
5 5
Guyana Jamaica
5 3
Maldives Nigeria
5 5
Singapore Sri Lanka
5 5
Yemen Arab Republic Bhutan
2 5
Equatorial Guinea Afghanistan
3 5
Cuba Indonesia
5 5
Iran Lebanon
5 5
Pakistan Saudi Arabia
5 5
Syria Turkey
5 5
Lesotho Malta
4 5
Trinidad & Tobago Czechoslovakia
5 2
San Marino Uzbekistan
1 1
Mauritius Palau
5 3
India Botswana
5 5
Costa Rica El Salvador
3 3
Peru Philippines
5 5
Thailand Burundi
5 5
Gabon Cyprus
5 5
Ethiopia Niger
5 5
Togo Barbados
5 3
Liberia Sierra Leone
3 5
Cambodia Eswatini
5 3
Congo - Kinshasa Gambia
3 5
Bahamas Iraq
3 5
Yugoslavia Papua New Guinea
5 3
Egypt Honduras
5 3
Kenya Colombia
5 5
Mexico Madagascar
5 5
Laos Fiji
5 3
Nepal Cameroon
5 3
Chad Central African Republic
5 3
Uruguay Solomon Islands
3 3
Côte d’Ivoire Ghana
3 5
Haiti Samoa
3 3
Albania Ecuador
5 5
Dominican Republic Rwanda
3 5
Myanmar (Burma) Guatemala
5 3
Panama Bolivia
3 5
South Africa Chile
4 3
Nicaragua Venezuela
5 5
Belarus Finland
5 4
Brazil Ukraine
5 5
Argentina Bulgaria
5 5
Romania Russia
5 5
Paraguay Malawi
3 4
Hungary Marshall Islands
5 3
Austria Poland
4 5
Sweden Ireland
4 4
Iceland Norway
4 4
New Zealand Greece
4 4
Denmark Micronesia (Federated States of)
4 3
Spain Australia
4 4
Japan Canada
4 4
Netherlands Portugal
4 4
Luxembourg Belgium
4 4
Italy United Kingdom
4 4
France Israel
4 3
United States Zanzibar
3 5
Within cluster sum of squares by cluster:
[1] 2.6730265 0.7970714 2.5388916 2.0926619 2.5761772
(between_SS / total_SS = 23.8 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
km.res5$cluster Angola Armenia
5 5
Azerbaijan Bahrain
5 5
Bangladesh Bosnia & Herzegovina
5 5
Brunei Cape Verde
5 5
Comoros Djibouti
5 5
Guinea-Bissau Namibia
5 5
North Korea Qatar
5 5
São Tomé & PrÃncipe Seychelles
5 5
United Arab Emirates Vanuatu
5 3
Vietnam Yemen
5 5
Algeria Antigua & Barbuda
5 3
China Congo - Brazzaville
5 5
Dominica German Democratic Republic
3 2
Guinea Jordan
5 5
Kuwait Libya
5 5
Malaysia Mali
5 5
Mauritania Monaco
5 1
Mongolia Morocco
5 5
Mozambique Oman
5 5
Senegal Somalia
5 5
St. Kitts & Nevis St. Lucia
3 3
St. Vincent & Grenadines Sudan
3 5
Suriname Tajikistan
5 1
Tanzania Tunisia
5 5
Uganda Yemen People's Republic
5 2
Zambia Zimbabwe
5 5
Kiribati Nauru
3 3
Taiwan Timor-Leste
2 1
Tonga Turkmenistan
3 1
Tuvalu Federal Republic of Germany
1 4
Andorra Belize
1 3
Croatia Czechia
1 1
Eritrea Estonia
1 1
Georgia Germany
1 1
Grenada Kazakhstan
3 1
Kyrgyzstan Latvia
1 1
Liechtenstein Lithuania
4 1
Moldova Montenegro
1 1
North Macedonia Slovakia
1 1
Slovenia South Korea
1 4
Switzerland South Sudan
1 4
Benin Burkina Faso
5 5
Guyana Jamaica
5 3
Maldives Nigeria
5 5
Singapore Sri Lanka
5 5
Yemen Arab Republic Bhutan
2 5
Equatorial Guinea Afghanistan
3 5
Cuba Indonesia
5 5
Iran Lebanon
5 5
Pakistan Saudi Arabia
5 5
Syria Turkey
5 5
Lesotho Malta
4 5
Trinidad & Tobago Czechoslovakia
5 2
San Marino Uzbekistan
1 1
Mauritius Palau
5 3
India Botswana
5 5
Costa Rica El Salvador
3 3
Peru Philippines
5 5
Thailand Burundi
5 5
Gabon Cyprus
5 5
Ethiopia Niger
5 5
Togo Barbados
5 3
Liberia Sierra Leone
3 5
Cambodia Eswatini
5 3
Congo - Kinshasa Gambia
3 5
Bahamas Iraq
3 5
Yugoslavia Papua New Guinea
5 3
Egypt Honduras
5 3
Kenya Colombia
5 5
Mexico Madagascar
5 5
Laos Fiji
5 3
Nepal Cameroon
5 3
Chad Central African Republic
5 3
Uruguay Solomon Islands
3 3
Côte d’Ivoire Ghana
3 5
Haiti Samoa
3 3
Albania Ecuador
5 5
Dominican Republic Rwanda
3 5
Myanmar (Burma) Guatemala
5 3
Panama Bolivia
3 5
South Africa Chile
4 3
Nicaragua Venezuela
5 5
Belarus Finland
5 4
Brazil Ukraine
5 5
Argentina Bulgaria
5 5
Romania Russia
5 5
Paraguay Malawi
3 4
Hungary Marshall Islands
5 3
Austria Poland
4 5
Sweden Ireland
4 4
Iceland Norway
4 4
New Zealand Greece
4 4
Denmark Micronesia (Federated States of)
4 3
Spain Australia
4 4
Japan Canada
4 4
Netherlands Portugal
4 4
Luxembourg Belgium
4 4
Italy United Kingdom
4 4
France Israel
4 3
United States Zanzibar
3 5
table(km.res5$cluster)
1 2 3 4 5
24 5 40 27 104
aggregate(svd_dimen15_wide, by = list(cluster = km.res5$cluster), mean)svd_km5<- cbind(svd_dimen15_wide, cluster = km.res5$cluster)
svd_km5fviz_cluster(km.res5,
data = svd_dimen15_wide,
palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07", "#ff5733"),
main = "cluster plots",
ggtheme = theme_clean()
)set.seed(123)
km.res7 <- kmeans(svd_dimen15_wide, 7, nstart = 25)
km.res7K-means clustering with 7 clusters of sizes 16, 8, 91, 23, 5, 33, 24
Cluster means:
1 2 3 4 5 6
1 -0.06611477 0.0005389728 -0.04972262 -0.103441987 -0.12162064 0.02137175
2 -0.04586867 -0.0324567788 0.25340303 0.033963777 -0.11923419 -0.03912281
3 -0.07904042 0.0291390247 -0.01747947 -0.004146101 0.02619012 -0.01042357
4 -0.05848173 -0.1619430115 0.01837464 -0.038403946 0.01736273 -0.05071695
5 -0.06787841 0.0416701609 -0.02902809 -0.095107121 -0.07354950 -0.11643444
6 -0.07149645 0.0317438409 0.03178120 0.075601864 -0.01886258 0.01335675
7 -0.04793178 -0.0760845876 -0.02853858 0.057834862 -0.01022246 0.11632434
7 8 9 10 11 12
1 -0.003850033 -0.0504648414 0.006000845 -0.051409429 -0.046476020 -0.040421140
2 -0.008280262 -0.0006645271 -0.036134815 -0.020231911 -0.007376369 -0.049480061
3 -0.012469197 -0.0104352864 -0.013043462 0.008272915 0.003808412 -0.004461497
4 -0.028873211 0.0019171313 0.019054570 -0.005675441 0.028345580 0.036817662
5 0.225077907 0.2986561128 -0.026532299 0.021728232 -0.002297519 0.028178317
6 0.007211103 -0.0026569244 0.023706000 -0.005451055 -0.009444211 0.034668749
7 0.046063646 0.0278874804 0.008897868 0.001400191 -0.004187335 -0.032580398
13 14 15
1 0.007466883 -0.011948188 -0.022602237
2 0.076293735 0.037692783 -0.007305326
3 0.022440745 0.003060035 -0.004182515
4 -0.039044094 -0.006644028 0.008558640
5 -0.010100988 -0.031485246 0.061317479
6 -0.069676470 0.007881197 0.016666354
7 0.033436381 -0.024757326 -0.007072273
Clustering vector:
Angola Armenia
3 1
Azerbaijan Bahrain
1 3
Bangladesh Bosnia & Herzegovina
3 1
Brunei Cape Verde
3 3
Comoros Djibouti
3 3
Guinea-Bissau Namibia
3 3
North Korea Qatar
3 3
São Tomé & PrÃncipe Seychelles
3 1
United Arab Emirates Vanuatu
3 3
Vietnam Yemen
3 3
Algeria Antigua & Barbuda
3 6
China Congo - Brazzaville
1 3
Dominica German Democratic Republic
6 5
Guinea Jordan
3 3
Kuwait Libya
3 3
Malaysia Mali
3 3
Mauritania Monaco
3 4
Mongolia Morocco
1 3
Mozambique Oman
3 3
Senegal Somalia
3 3
St. Kitts & Nevis St. Lucia
6 6
St. Vincent & Grenadines Sudan
6 3
Suriname Tajikistan
3 4
Tanzania Tunisia
3 3
Uganda Yemen People's Republic
3 5
Zambia Zimbabwe
3 3
Kiribati Nauru
2 2
Taiwan Timor-Leste
5 4
Tonga Turkmenistan
2 4
Tuvalu Federal Republic of Germany
2 7
Andorra Belize
4 6
Croatia Czechia
4 4
Eritrea Estonia
4 4
Georgia Germany
4 4
Grenada Kazakhstan
6 4
Kyrgyzstan Latvia
4 4
Liechtenstein Lithuania
7 4
Moldova Montenegro
4 4
North Macedonia Slovakia
4 4
Slovenia South Korea
4 1
Switzerland South Sudan
4 2
Benin Burkina Faso
3 3
Guyana Jamaica
3 6
Maldives Nigeria
3 3
Singapore Sri Lanka
3 3
Yemen Arab Republic Bhutan
5 3
Equatorial Guinea Afghanistan
6 3
Cuba Indonesia
3 3
Iran Lebanon
3 3
Pakistan Saudi Arabia
3 3
Syria Turkey
3 1
Lesotho Malta
3 3
Trinidad & Tobago Czechoslovakia
3 5
San Marino Uzbekistan
4 4
Mauritius Palau
3 2
India Botswana
3 3
Costa Rica El Salvador
6 6
Peru Philippines
3 3
Thailand Burundi
3 3
Gabon Cyprus
3 3
Ethiopia Niger
3 3
Togo Barbados
3 6
Liberia Sierra Leone
6 3
Cambodia Eswatini
3 6
Congo - Kinshasa Gambia
6 3
Bahamas Iraq
6 3
Yugoslavia Papua New Guinea
3 6
Egypt Honduras
3 6
Kenya Colombia
3 3
Mexico Madagascar
3 3
Laos Fiji
3 6
Nepal Cameroon
3 6
Chad Central African Republic
3 6
Uruguay Solomon Islands
6 6
Côte d’Ivoire Ghana
6 3
Haiti Samoa
6 6
Albania Ecuador
1 3
Dominican Republic Rwanda
6 3
Myanmar (Burma) Guatemala
3 6
Panama Bolivia
6 3
South Africa Chile
7 6
Nicaragua Venezuela
3 3
Belarus Finland
1 7
Brazil Ukraine
3 1
Argentina Bulgaria
3 1
Romania Russia
1 1
Paraguay Malawi
6 7
Hungary Marshall Islands
1 2
Austria Poland
7 1
Sweden Ireland
7 7
Iceland Norway
7 7
New Zealand Greece
7 7
Denmark Micronesia (Federated States of)
7 2
Spain Australia
7 7
Japan Canada
7 7
Netherlands Portugal
7 7
Luxembourg Belgium
7 7
Italy United Kingdom
7 7
France Israel
7 6
United States Zanzibar
6 3
Within cluster sum of squares by cluster:
[1] 0.9107063 1.6223505 1.2851942 2.3711807 0.7970714 1.0286972 1.4396068
(between_SS / total_SS = 32.5 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
km.res7$cluster Angola Armenia
3 1
Azerbaijan Bahrain
1 3
Bangladesh Bosnia & Herzegovina
3 1
Brunei Cape Verde
3 3
Comoros Djibouti
3 3
Guinea-Bissau Namibia
3 3
North Korea Qatar
3 3
São Tomé & PrÃncipe Seychelles
3 1
United Arab Emirates Vanuatu
3 3
Vietnam Yemen
3 3
Algeria Antigua & Barbuda
3 6
China Congo - Brazzaville
1 3
Dominica German Democratic Republic
6 5
Guinea Jordan
3 3
Kuwait Libya
3 3
Malaysia Mali
3 3
Mauritania Monaco
3 4
Mongolia Morocco
1 3
Mozambique Oman
3 3
Senegal Somalia
3 3
St. Kitts & Nevis St. Lucia
6 6
St. Vincent & Grenadines Sudan
6 3
Suriname Tajikistan
3 4
Tanzania Tunisia
3 3
Uganda Yemen People's Republic
3 5
Zambia Zimbabwe
3 3
Kiribati Nauru
2 2
Taiwan Timor-Leste
5 4
Tonga Turkmenistan
2 4
Tuvalu Federal Republic of Germany
2 7
Andorra Belize
4 6
Croatia Czechia
4 4
Eritrea Estonia
4 4
Georgia Germany
4 4
Grenada Kazakhstan
6 4
Kyrgyzstan Latvia
4 4
Liechtenstein Lithuania
7 4
Moldova Montenegro
4 4
North Macedonia Slovakia
4 4
Slovenia South Korea
4 1
Switzerland South Sudan
4 2
Benin Burkina Faso
3 3
Guyana Jamaica
3 6
Maldives Nigeria
3 3
Singapore Sri Lanka
3 3
Yemen Arab Republic Bhutan
5 3
Equatorial Guinea Afghanistan
6 3
Cuba Indonesia
3 3
Iran Lebanon
3 3
Pakistan Saudi Arabia
3 3
Syria Turkey
3 1
Lesotho Malta
3 3
Trinidad & Tobago Czechoslovakia
3 5
San Marino Uzbekistan
4 4
Mauritius Palau
3 2
India Botswana
3 3
Costa Rica El Salvador
6 6
Peru Philippines
3 3
Thailand Burundi
3 3
Gabon Cyprus
3 3
Ethiopia Niger
3 3
Togo Barbados
3 6
Liberia Sierra Leone
6 3
Cambodia Eswatini
3 6
Congo - Kinshasa Gambia
6 3
Bahamas Iraq
6 3
Yugoslavia Papua New Guinea
3 6
Egypt Honduras
3 6
Kenya Colombia
3 3
Mexico Madagascar
3 3
Laos Fiji
3 6
Nepal Cameroon
3 6
Chad Central African Republic
3 6
Uruguay Solomon Islands
6 6
Côte d’Ivoire Ghana
6 3
Haiti Samoa
6 6
Albania Ecuador
1 3
Dominican Republic Rwanda
6 3
Myanmar (Burma) Guatemala
3 6
Panama Bolivia
6 3
South Africa Chile
7 6
Nicaragua Venezuela
3 3
Belarus Finland
1 7
Brazil Ukraine
3 1
Argentina Bulgaria
3 1
Romania Russia
1 1
Paraguay Malawi
6 7
Hungary Marshall Islands
1 2
Austria Poland
7 1
Sweden Ireland
7 7
Iceland Norway
7 7
New Zealand Greece
7 7
Denmark Micronesia (Federated States of)
7 2
Spain Australia
7 7
Japan Canada
7 7
Netherlands Portugal
7 7
Luxembourg Belgium
7 7
Italy United Kingdom
7 7
France Israel
7 6
United States Zanzibar
6 3
table(km.res7$cluster)
1 2 3 4 5 6 7
16 8 91 23 5 33 24
aggregate(svd_dimen15_wide, by = list(cluster = km.res7$cluster), mean)svd_km7<- cbind(svd_dimen15_wide, cluster = km.res7$cluster)
svd_km7fviz_cluster(km.res7,
data = svd_dimen15_wide,
palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07",
"#ff5733", "#33ff42", "#f333ff"),
main = "cluster plots",
ggtheme = theme_clean()
)svd_dimen_all <- by_country_word %>%
widely_svd(country, word, pct_yes)svd_dimen_allsvd_dimen_all_wide <- pivot_wider(data = svd_dimen_all, id_cols = country, names_from = dimension, values_from = value)
svd_dimen_all_widesvd_dimen_all_wide <- as.data.frame(svd_dimen_all_wide)rownames(svd_dimen_all_wide) <- svd_dimen_all_wide$countrysvd_dimen_all_widesvd_dimen_all_wide <- svd_dimen_all_wide[,2:30]
svd_dimen_all_widewss <- factoextra::fviz_nbclust(scale(svd_dimen_all_wide), kmeans, method = "wss")
wsssilhouette <- factoextra::fviz_nbclust(scale(svd_dimen_all_wide), kmeans, method = "silhouette")
silhouettegap_stat <- factoextra::fviz_nbclust(scale(svd_dimen_all_wide), kmeans, method = "gap_stat")Clustering k = 1,2,..., K.max (= 10): .. done
Bootstrapping, b = 1,2,..., B (= 100) [one "." per sample]:
.................................................. 50
.................................................. 100
gap_statset.seed(123)
km.res_all_2 <- kmeans(svd_dimen_all_wide, 2, nstart = 25)
km.res_all_2K-means clustering with 2 clusters of sizes 6, 194
Cluster means:
1 2 3 4 5 6
1 -0.06139325 0.02107797 -0.031731997 -0.071108429 0.061614167 -0.068858189
2 -0.06929547 -0.01041582 0.002436869 0.006134359 0.005067146 0.005038479
7 8 9 10 11 12
1 0.206963898 -0.292065376 0.0282339387 -0.032867020 0.0221243746 0.04799433
2 -0.003605791 0.007194646 -0.0004681654 0.003077048 0.0004897412 -0.00199332
13 14 15 16 17
1 -0.016700867 -0.0330012270 0.047456482 0.080158569 -0.0005939739
2 0.002199812 -0.0002963578 -0.001895586 -0.002759852 -0.0014781935
18 19 20 21 22
1 0.0023536140 -0.0126201482 0.003607988 -0.0073441991 -0.0003078067
2 0.0004010253 0.0003282263 -0.001035063 0.0004940144 0.0008492005
23 24 25 26 27
1 -0.00608619875 -0.0051930215 -0.025993852 -0.0244021009 0.01668698542
2 0.00005197066 0.0003507106 -0.000191068 0.0007040355 -0.00007176241
28 29
1 -0.01400574 0.023086375
2 -0.00048371 -0.001122705
Clustering vector:
Angola Armenia
2 2
Azerbaijan Bahrain
2 2
Bangladesh Bosnia & Herzegovina
2 2
Brunei Cape Verde
2 2
Comoros Djibouti
2 2
Guinea-Bissau Namibia
2 2
North Korea Qatar
2 2
São Tomé & PrÃncipe Seychelles
2 2
United Arab Emirates Vanuatu
2 2
Vietnam Yemen
2 2
Algeria Antigua & Barbuda
2 2
China Congo - Brazzaville
2 2
Dominica German Democratic Republic
2 1
Guinea Jordan
2 2
Kuwait Libya
2 2
Malaysia Mali
2 2
Mauritania Monaco
2 2
Mongolia Morocco
2 2
Mozambique Oman
2 2
Senegal Somalia
2 2
St. Kitts & Nevis St. Lucia
2 2
St. Vincent & Grenadines Sudan
2 2
Suriname Tajikistan
2 2
Tanzania Tunisia
2 2
Uganda Yemen People's Republic
2 1
Zambia Zimbabwe
2 2
Kiribati Nauru
2 2
Taiwan Timor-Leste
1 2
Tonga Turkmenistan
2 2
Tuvalu Federal Republic of Germany
2 1
Andorra Belize
2 2
Croatia Czechia
2 2
Eritrea Estonia
2 2
Georgia Germany
2 2
Grenada Kazakhstan
2 2
Kyrgyzstan Latvia
2 2
Liechtenstein Lithuania
2 2
Moldova Montenegro
2 2
North Macedonia Slovakia
2 2
Slovenia South Korea
2 2
Switzerland South Sudan
2 2
Benin Burkina Faso
2 2
Guyana Jamaica
2 2
Maldives Nigeria
2 2
Singapore Sri Lanka
2 2
Yemen Arab Republic Bhutan
1 2
Equatorial Guinea Afghanistan
2 2
Cuba Indonesia
2 2
Iran Lebanon
2 2
Pakistan Saudi Arabia
2 2
Syria Turkey
2 2
Lesotho Malta
2 2
Trinidad & Tobago Czechoslovakia
2 1
San Marino Uzbekistan
2 2
Mauritius Palau
2 2
India Botswana
2 2
Costa Rica El Salvador
2 2
Peru Philippines
2 2
Thailand Burundi
2 2
Gabon Cyprus
2 2
Ethiopia Niger
2 2
Togo Barbados
2 2
Liberia Sierra Leone
2 2
Cambodia Eswatini
2 2
Congo - Kinshasa Gambia
2 2
Bahamas Iraq
2 2
Yugoslavia Papua New Guinea
2 2
Egypt Honduras
2 2
Kenya Colombia
2 2
Mexico Madagascar
2 2
Laos Fiji
2 2
Nepal Cameroon
2 2
Chad Central African Republic
2 2
Uruguay Solomon Islands
2 2
Côte d’Ivoire Ghana
2 2
Haiti Samoa
2 2
Albania Ecuador
2 2
Dominican Republic Rwanda
2 2
Myanmar (Burma) Guatemala
2 2
Panama Bolivia
2 2
South Africa Chile
2 2
Nicaragua Venezuela
2 2
Belarus Finland
2 2
Brazil Ukraine
2 2
Argentina Bulgaria
2 2
Romania Russia
2 2
Paraguay Malawi
2 2
Hungary Marshall Islands
2 2
Austria Poland
2 2
Sweden Ireland
2 2
Iceland Norway
2 2
New Zealand Greece
2 2
Denmark Micronesia (Federated States of)
2 2
Spain Australia
2 2
Japan Canada
2 2
Netherlands Portugal
2 2
Luxembourg Belgium
2 2
Italy United Kingdom
2 2
France Israel
2 2
United States Zanzibar
2 2
Within cluster sum of squares by cluster:
[1] 2.538819 24.485084
(between_SS / total_SS = 3.5 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
km.res_all_2$cluster Angola Armenia
2 2
Azerbaijan Bahrain
2 2
Bangladesh Bosnia & Herzegovina
2 2
Brunei Cape Verde
2 2
Comoros Djibouti
2 2
Guinea-Bissau Namibia
2 2
North Korea Qatar
2 2
São Tomé & PrÃncipe Seychelles
2 2
United Arab Emirates Vanuatu
2 2
Vietnam Yemen
2 2
Algeria Antigua & Barbuda
2 2
China Congo - Brazzaville
2 2
Dominica German Democratic Republic
2 1
Guinea Jordan
2 2
Kuwait Libya
2 2
Malaysia Mali
2 2
Mauritania Monaco
2 2
Mongolia Morocco
2 2
Mozambique Oman
2 2
Senegal Somalia
2 2
St. Kitts & Nevis St. Lucia
2 2
St. Vincent & Grenadines Sudan
2 2
Suriname Tajikistan
2 2
Tanzania Tunisia
2 2
Uganda Yemen People's Republic
2 1
Zambia Zimbabwe
2 2
Kiribati Nauru
2 2
Taiwan Timor-Leste
1 2
Tonga Turkmenistan
2 2
Tuvalu Federal Republic of Germany
2 1
Andorra Belize
2 2
Croatia Czechia
2 2
Eritrea Estonia
2 2
Georgia Germany
2 2
Grenada Kazakhstan
2 2
Kyrgyzstan Latvia
2 2
Liechtenstein Lithuania
2 2
Moldova Montenegro
2 2
North Macedonia Slovakia
2 2
Slovenia South Korea
2 2
Switzerland South Sudan
2 2
Benin Burkina Faso
2 2
Guyana Jamaica
2 2
Maldives Nigeria
2 2
Singapore Sri Lanka
2 2
Yemen Arab Republic Bhutan
1 2
Equatorial Guinea Afghanistan
2 2
Cuba Indonesia
2 2
Iran Lebanon
2 2
Pakistan Saudi Arabia
2 2
Syria Turkey
2 2
Lesotho Malta
2 2
Trinidad & Tobago Czechoslovakia
2 1
San Marino Uzbekistan
2 2
Mauritius Palau
2 2
India Botswana
2 2
Costa Rica El Salvador
2 2
Peru Philippines
2 2
Thailand Burundi
2 2
Gabon Cyprus
2 2
Ethiopia Niger
2 2
Togo Barbados
2 2
Liberia Sierra Leone
2 2
Cambodia Eswatini
2 2
Congo - Kinshasa Gambia
2 2
Bahamas Iraq
2 2
Yugoslavia Papua New Guinea
2 2
Egypt Honduras
2 2
Kenya Colombia
2 2
Mexico Madagascar
2 2
Laos Fiji
2 2
Nepal Cameroon
2 2
Chad Central African Republic
2 2
Uruguay Solomon Islands
2 2
Côte d’Ivoire Ghana
2 2
Haiti Samoa
2 2
Albania Ecuador
2 2
Dominican Republic Rwanda
2 2
Myanmar (Burma) Guatemala
2 2
Panama Bolivia
2 2
South Africa Chile
2 2
Nicaragua Venezuela
2 2
Belarus Finland
2 2
Brazil Ukraine
2 2
Argentina Bulgaria
2 2
Romania Russia
2 2
Paraguay Malawi
2 2
Hungary Marshall Islands
2 2
Austria Poland
2 2
Sweden Ireland
2 2
Iceland Norway
2 2
New Zealand Greece
2 2
Denmark Micronesia (Federated States of)
2 2
Spain Australia
2 2
Japan Canada
2 2
Netherlands Portugal
2 2
Luxembourg Belgium
2 2
Italy United Kingdom
2 2
France Israel
2 2
United States Zanzibar
2 2
table(km.res_all_2$cluster)
1 2
6 194
aggregate(svd_dimen_all_wide, by = list(cluster = km.res_all_2$cluster), mean)svd_km_all_2<- cbind(svd_dimen_all_wide, cluster = km.res_all_2$cluster)
svd_km_all_2fviz_cluster(km.res_all_2,
data = svd_dimen_all_wide,
palette = c("#00AFBB","#E7B800"), # "#2E9FDF", "#FC4E07", "#ff5733"),
main = "cluster plots",
ggtheme = theme_clean()
)set.seed(123)
km.res_all_6 <- kmeans(svd_dimen_all_wide, 6, nstart = 25)
km.res_all_6K-means clustering with 6 clusters of sizes 17, 5, 89, 19, 58, 12
Cluster means:
1 2 3 4 5 6
1 -0.06153182 0.010079120 0.018041323 -0.079446958 0.14466348 0.036944903
2 -0.06787841 0.041670161 -0.029028086 -0.095107121 0.07354950 -0.116434435
3 -0.07905462 0.028473118 -0.017447432 -0.004959457 -0.02648906 -0.008897402
4 -0.05322435 -0.170362010 -0.015443428 0.007344535 0.03183601 -0.080546873
5 -0.06215663 -0.009795446 -0.002399368 0.068518412 0.01886808 0.046775307
6 -0.06450331 -0.083582549 0.175517360 -0.090222779 -0.06800182 0.010643009
7 8 9 10 11 12
1 -0.03717477 0.02271060 -0.003247939 0.049089561 0.039264628 -0.034010623
2 0.22507791 -0.29865611 0.026532299 -0.021728232 0.002297519 0.028178317
3 -0.01098023 0.01429689 0.014654362 -0.008679015 -0.004960538 -0.003531060
4 -0.01740719 0.01869446 -0.034700987 -0.045696574 -0.007593595 -0.002818792
5 0.01977140 -0.01251500 -0.016680765 0.007411617 0.012981166 0.010828505
6 0.01750648 -0.01259821 0.026974825 0.073721439 -0.051530973 0.006526593
13 14 15 16 17 18
1 0.01381594 -0.022786086 0.0070141562 -0.004193696 0.010574437 0.009796145
2 -0.01010099 -0.031485244 -0.0613174476 0.077812941 0.043116153 -0.002420708
3 0.02248038 0.004676469 0.0023545846 0.012838770 -0.017097403 -0.007555227
4 -0.00608708 0.011276797 -0.0002718366 0.017044448 0.009588254 -0.035007813
5 -0.02608786 -0.004860531 0.0001799057 -0.026832707 0.016103113 0.013600405
6 -0.01915094 -0.004938769 -0.0092071578 -0.023535822 -0.023347342 0.040519156
19 20 21 22 23
1 0.0006201514 -0.0284128427 -0.038348540 -0.022580277 -0.036727851
2 0.0002212013 0.0554714318 -0.038533704 -0.024724888 0.003942613
3 -0.0014538048 -0.0008028494 -0.007248286 0.003339062 -0.004933491
4 0.0154494501 -0.0029790538 -0.008443280 0.014073004 -0.013112443
5 -0.0023017511 0.0047351923 0.020801411 0.004037867 0.017878113
6 -0.0045285773 -0.0100065588 0.041283771 -0.010697721 0.019126004
24 25 26 27 28
1 0.005493446 -0.005117578 0.002417232 0.0029094274 0.007428524
2 -0.016486206 0.001868707 -0.018987153 0.0160369058 0.009720300
3 0.003452780 0.004296271 0.002951245 0.0001131737 -0.007050715
4 -0.002556673 0.006071483 -0.015038645 -0.0211500743 0.004213714
5 -0.007911431 -0.005420122 0.001394422 0.0072501776 0.005088708
6 0.018838713 -0.024894516 -0.001149155 -0.0060146780 -0.008371050
29
1 0.01521460663
2 0.00916700902
3 -0.00434151837
4 0.01636142760
5 0.00008501225
6 -0.02609771673
Clustering vector:
Angola Armenia
3 3
Azerbaijan Bahrain
1 3
Bangladesh Bosnia & Herzegovina
3 4
Brunei Cape Verde
3 3
Comoros Djibouti
3 3
Guinea-Bissau Namibia
3 3
North Korea Qatar
3 3
São Tomé & PrÃncipe Seychelles
3 1
United Arab Emirates Vanuatu
3 5
Vietnam Yemen
3 3
Algeria Antigua & Barbuda
3 5
China Congo - Brazzaville
1 3
Dominica German Democratic Republic
1 2
Guinea Jordan
3 3
Kuwait Libya
3 3
Malaysia Mali
3 3
Mauritania Monaco
3 4
Mongolia Morocco
1 3
Mozambique Oman
3 3
Senegal Somalia
3 3
St. Kitts & Nevis St. Lucia
5 5
St. Vincent & Grenadines Sudan
5 3
Suriname Tajikistan
3 6
Tanzania Tunisia
3 3
Uganda Yemen People's Republic
3 2
Zambia Zimbabwe
3 3
Kiribati Nauru
6 6
Taiwan Timor-Leste
2 6
Tonga Turkmenistan
4 6
Tuvalu Federal Republic of Germany
6 5
Andorra Belize
4 5
Croatia Czechia
4 4
Eritrea Estonia
6 4
Georgia Germany
4 4
Grenada Kazakhstan
5 6
Kyrgyzstan Latvia
6 4
Liechtenstein Lithuania
4 4
Moldova Montenegro
4 4
North Macedonia Slovakia
4 4
Slovenia South Korea
4 1
Switzerland South Sudan
4 6
Benin Burkina Faso
3 3
Guyana Jamaica
3 5
Maldives Nigeria
3 3
Singapore Sri Lanka
3 3
Yemen Arab Republic Bhutan
2 3
Equatorial Guinea Afghanistan
5 3
Cuba Indonesia
3 3
Iran Lebanon
3 3
Pakistan Saudi Arabia
3 3
Syria Turkey
3 5
Lesotho Malta
3 5
Trinidad & Tobago Czechoslovakia
3 2
San Marino Uzbekistan
4 6
Mauritius Palau
3 1
India Botswana
3 3
Costa Rica El Salvador
3 5
Peru Philippines
3 3
Thailand Burundi
3 3
Gabon Cyprus
3 5
Ethiopia Niger
3 3
Togo Barbados
3 5
Liberia Sierra Leone
5 3
Cambodia Eswatini
3 5
Congo - Kinshasa Gambia
5 3
Bahamas Iraq
5 3
Yugoslavia Papua New Guinea
3 5
Egypt Honduras
3 5
Kenya Colombia
3 3
Mexico Madagascar
3 3
Laos Fiji
3 5
Nepal Cameroon
3 5
Chad Central African Republic
3 5
Uruguay Solomon Islands
5 5
Côte d’Ivoire Ghana
5 3
Haiti Samoa
5 5
Albania Ecuador
1 3
Dominican Republic Rwanda
5 5
Myanmar (Burma) Guatemala
3 5
Panama Bolivia
5 3
South Africa Chile
6 5
Nicaragua Venezuela
3 3
Belarus Finland
1 5
Brazil Ukraine
3 1
Argentina Bulgaria
3 1
Romania Russia
1 1
Paraguay Malawi
5 5
Hungary Marshall Islands
1 1
Austria Poland
5 1
Sweden Ireland
5 5
Iceland Norway
5 5
New Zealand Greece
5 5
Denmark Micronesia (Federated States of)
5 1
Spain Australia
5 5
Japan Canada
5 5
Netherlands Portugal
5 5
Luxembourg Belgium
5 5
Italy United Kingdom
5 5
France Israel
5 5
United States Zanzibar
5 3
Within cluster sum of squares by cluster:
[1] 2.710458 1.795699 4.722650 2.984536 7.515767 3.989531
(between_SS / total_SS = 15.3 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
# km.res_all_6$clustertable(km.res_all_6$cluster)
1 2 3 4 5 6
17 5 89 19 58 12
aggregate(svd_dimen_all_wide, by = list(cluster = km.res_all_6$cluster), mean)svd_km_all_6<- cbind(svd_dimen_all_wide, cluster = km.res_all_6$cluster)
svd_km_all_6fviz_cluster(km.res_all_6,
data = svd_dimen_all_wide,
palette = c("#00AFBB","#E7B800", "#2E9FDF", "#FC4E07", "#ff5733", "#f333ff"),
main = "cluster plots",
ggtheme = theme_clean()
)fviz_cluster(km.res_all_6,
data = svd_dimen_all_wide,
palette = c("#00AFBB","#E7B800", "#2E9FDF", "#FC4E07", "#ff5733", "#f333ff"),
main = "6 cluster plots",
ggtheme = theme_clean(),
ellipse.type = "euclid",
repel = TRUE
)svd6_final_df <- rownames_to_column(svd_km_all_6, "country")
svd6_final_dfsvd6_final_df <- svd6_final_df %>%
select(country, cluster, everything())
svd6_final_dfworld_data %>%
left_join((svd6_final_df %>%
mutate(country_code = countrycode(country, "country.name", "iso2c"))
),
by = c("country_code")) %>%
filter(!is.na(cluster)) %>%
ggplot(aes(x = long, y = lat, group = group,
fill = as.factor(cluster))) +
geom_polygon() +
theme_map() +
scale_fill_discrete() +
labs(fill = "cluster",
title = "World Clusters based on UN voting",
caption = "created by ViSa") +
theme(plot.title = element_text(face = "bold", size = 16))from: https://github.com/vincentarelbundock/countrycode
library(gt)Add country Flags to the table
Flags are not appearing in the table
ind_cluster <- svd6_final_df %>%
filter(cluster == (svd6_final_df %>%
filter(country == "India") %>%
pull(cluster) )
)
ind_clusterind_cluster <- ind_cluster %>%
mutate(continent = countrycode(country, "country.name", "continent")) %>%
select(country, continent, cluster, everything())
ind_clusterind_cluster %>%
filter(country != "Zanzibar") %>%
ggplot(aes(x = `1`, y = `2`, col = continent)) +
geom_point() +
geom_text(aes(label = country)) +
labs(title = "India's cluster-3 with Dim1 & 2 on plot")by_country_word_yr_2000 <- unvotes %>%
inner_join(rc_words, by = "rcid") %>%
mutate(year = year(date)) %>%
filter(year > 1999) %>%
group_by(word, country, year) %>%
summarize_votes(min_votes = 0)
by_country_word_yr_2000cluster_data_2000 <- by_country_word_yr_2000 %>%
widely_svd(country, word, pct_yes)
cluster_data_2000cluster_data_2000_wide <- pivot_wider(data = cluster_data_2000, id_cols = country,
names_from = dimension, values_from = value)
cluster_data_2000_wideconverting to dataframe / matrix
cluster_data_2000_wide <- as.data.frame(cluster_data_2000_wide)
rownames(cluster_data_2000_wide) <- cluster_data_2000_wide$country
cluster_data_2000_widecluster_data_2000_wide <- cluster_data_2000_wide[, 2:29]
cluster_data_2000_widefactoextra::fviz_nbclust(scale(cluster_data_2000_wide), kmeans, method = "wss")factoextra::fviz_nbclust(scale(cluster_data_2000_wide), kmeans, method = "silhouette")factoextra::fviz_nbclust(scale(cluster_data_2000_wide), kmeans, method = "gap_stat")Clustering k = 1,2,..., K.max (= 10): .. done
Bootstrapping, b = 1,2,..., B (= 100) [one "." per sample]:
.................................................. 50
.................................................. 100
set.seed(123)
km.res4_2000 <- kmeans(cluster_data_2000_wide, 6, nstart = 25)table(km.res4_2000$cluster)
1 2 3 4 5 6
8 7 25 125 6 22
cluster_data_2000_wide_final <- cbind(cluster_data_2000_wide, cluster = km.res4_2000$cluster)
cluster_data_2000_wide_finalfviz_cluster(km.res4_2000,
data = cluster_data_2000_wide_final,
palette = c("#00AFBB","#E7B800", "#2E9FDF", "#FC4E07", "#ff5733", "#f333ff"),
main = "cluster plots",
ggtheme = theme_clean(),
ellipse.type = "euclid",
repel = TRUE
)fviz_cluster(km.res4_2000,
data = cluster_data_2000_wide_final,
palette = c("#00AFBB","#E7B800", "#2E9FDF", "#FC4E07", "#ff5733", "#f333ff"),
main = "cluster plots",
ggtheme = theme_clean()
)cluster_data_2000_wide_final <- rownames_to_column(cluster_data_2000_wide_final, "country") %>%
select(country, cluster, everything())
cluster_data_2000_wide_finalworld_data %>%
left_join((cluster_data_2000_wide_final %>%
mutate(country_code = countrycode(country, "country.name", "iso2c"))
),
by = "country_code") %>%
filter(!is.na(cluster)) %>%
ggplot(aes(x = long, y = lat, group = group, fill = as.factor(cluster))) +
geom_polygon() +
theme_map() +
scale_fill_discrete() +
labs(fill = "cluster",
title = "World Clusters based on UN voting year 2000 onwards",
caption = "created by ViSa") +
theme(plot.title = element_text(face = "bold", size = 16))by_country_word_yr_19xx <- unvotes %>%
inner_join(rc_words, by = "rcid") %>%
mutate(year = year(date)) %>%
filter(year <= 1999) %>%
group_by(word, country, year) %>%
summarize_votes(min_votes = 0)
by_country_word_yr_19xxcluster_data_19xx <- by_country_word_yr_19xx %>%
widely_svd(country, word, pct_yes)
cluster_data_19xxcluster_data_19xx_wide <- pivot_wider(data = cluster_data_19xx, id_cols = country,
names_from = dimension, values_from = value)
cluster_data_19xx_wideconverting to dataframe / matrix
cluster_data_19xx_wide <- as.data.frame(cluster_data_19xx_wide)
rownames(cluster_data_19xx_wide) <- cluster_data_19xx_wide$country
cluster_data_19xx_widecluster_data_19xx_wide <- cluster_data_19xx_wide[, 2:30]
cluster_data_19xx_wideset.seed(123)
km.res4_19xx <- kmeans(cluster_data_19xx_wide, 6, nstart = 25)table(km.res4_19xx$cluster)
1 2 3 4 5 6
75 16 17 5 28 51
cluster_data_19xx_wide_final <- cbind(cluster_data_19xx_wide, cluster = km.res4_19xx$cluster)
cluster_data_19xx_wide_finalfviz_cluster(km.res4_19xx,
data = cluster_data_19xx_wide_final,
palette = c("#00AFBB","#E7B800", "#2E9FDF", "#FC4E07", "#ff5733", "#f333ff"),
main = "cluster plots",
ggtheme = theme_clean(),
ellipse.type = "euclid",
repel = TRUE
)fviz_cluster(km.res4_19xx,
data = cluster_data_19xx_wide_final,
palette = c("#00AFBB","#E7B800", "#2E9FDF", "#FC4E07", "#ff5733", "#f333ff"),
main = "cluster plots",
ggtheme = theme_clean()
)cluster_data_19xx_wide_final <- rownames_to_column(cluster_data_19xx_wide_final, "country") %>%
select(country, cluster, everything())
cluster_data_19xx_wide_finalworld_data %>%
left_join((cluster_data_19xx_wide_final %>%
mutate(country_code = countrycode(country, "country.name", "iso2c"))
),
by = "country_code") %>%
filter(!is.na(cluster)) %>%
ggplot(aes(x = long, y = lat, group = group, fill = as.factor(cluster))) +
geom_polygon() +
theme_map() +
scale_fill_discrete() +
labs(fill = "cluster",
title = "World 6 Clusters based on UN voting in 1900's",
caption = "created by ViSa") +
theme(plot.title = element_text(face = "bold", size = 16))final_clusters_df <- svd_dimen_all_wide %>%
rownames_to_column(var = "country")Error in as.data.frame.default(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors) :
cannot coerce class ‘"kmeans"’ to a data.frame
Error in as.data.frame(cluster_res_list) :
object 'cluster_res_list' not found
irisiris_df <- iris %>%
as.data.frame()
rownames(iris_df) <- 1: nrow(iris_df)iris_dflapply(1:3,
function(cluster_num){
cluster_res_list <- as.list(kmeans(iris %>% select(-Species), cluster_num, nstart = 25))
names(cluster_res_list) <- paste("iris_clus", 1:length(cluster_res_list), sep="_")
list2env(cluster_res_list, envir = .GlobalEnv)
# print(head(cluster_res_list))
iris_df <- cbind(iris, paste0("iris_clus_", cluster_num))
} )[[1]]
[[2]]
[[3]]
NA
iris_dffor(cluster_num in 1:3){
km_cluster_res <- kmeans(iris %>% select(-Species), cluster_num, nstart = 25)
print(assign(paste("iris_clus", cluster_num,sep = "_"),km_cluster_res$cluster))
# iris_df <- cbind(iris,
# assign(paste("iris_clus", cluster_num,sep = "_"),
# km_cluster_res$cluster)
# )
} [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[115] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[39] 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[115] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[39] 2 2 2 2 2 2 2 2 2 2 2 2 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[77] 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 1 1 1 3 1 1 1 1 1 1 3
[115] 3 1 1 1 1 3 1 3 1 3 1 1 3 3 1 1 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 1 3
# iris_df %>% head()Error: unexpected '=' in:
" # tibble(iris_clus = .) %>%
tibble(paste0("iris_clus_", cluster_num) ="
Error: unexpected '=' in:
" cbind(iris) %>%
rename(paste0("iris_clus_", cluster_num) ="
cbind() doesn’t combine dataframe of 0 rows with df of non 0 rows, so use below way of doing it.
# final_df <- data.frame()
final_df <- list()
for(cluster_num in 1:3){
km_cluster_res <- kmeans(iris %>% select(-Species), cluster_num, nstart = 25)
iris_results <- as.data.frame(km_cluster_res$cluster)
names(iris_results) <- paste("iris_clus_", cluster_num, sep="")
final_df <- as.data.frame(cbind(final_df, as.matrix(iris_results)) )
}
final_df %>% head()In below code combining non 0 rows df with same rows df.
final_df <- iris
for(cluster_num in 1:3){
km_cluster_res <- kmeans(iris %>% select(-Species), cluster_num, nstart = 25)
iris_results <- as.data.frame(km_cluster_res$cluster)
names(iris_results) <- paste("iris_clus_", cluster_num, sep="")
final_df <- as.data.frame(cbind(final_df, iris_results) )
# print(names(final_df))
}
final_df %>% head()clusters <- Reduce(cbind, lapply(1:3, function(cluster_num) {
result <- iris %>%
select(-Species) %>%
kmeans(centers = cluster_num, nstart = 25) %>%
fitted() %>%
row.names() %>%
tibble(iris_clus = .)
names(result) <- paste("iris_clus", cluster_num, sep = "_")
return(result)
}))
cbind(iris, clusters)
final_clusters_df <- svd_dimen_all_wide
for(cluster_num in 1:15){
km_cluster_res <- kmeans(final_clusters_df, cluster_num, nstart = 25)
results <- as.data.frame(km_cluster_res$cluster)
names(results) <- paste("cluster", cluster_num, sep="_")
final_clusters_df <- as.data.frame(cbind(final_clusters_df, results) )
}
final_clusters_df %>% head()unvotes %>%
# mutate_all(as.factor) %>%
group_by(years = year(date) , vote) %>%
summarise(count = n(), .groups = "drop_last") %>%
# mutate(pct = count/sum(count)) %>%
ggplot(aes(x = years, y = count, fill = vote)) +
ggstream::geom_stream(show.legend = FALSE) +
geom_stream_label(aes(label = vote)) +
scale_y_continuous(labels = comma) +
scale_fill_manual(values = wes_palette("Darjeeling2")) +
theme(panel.grid.major = element_blank()) +
labs(title = "World UN Voting trend over the years",
y = "", x = "",
caption = "created by ViSa")unvotes_plus_continents <- unvotes %>%
mutate(continent = countrycode(country_code, "iso2c", "continent"))
unvotes_plus_continents %>%
group_by(continent, year = year(date) , vote) %>%
summarise(count = n(), .groups = "drop_last") %>%
na.omit() %>%
# mutate(pct = count/sum(count)) %>%
ggplot(aes(x = year, y = count, fill = vote)) +
ggstream::geom_stream(show.legend = FALSE) +
geom_stream_label(aes(label = vote)) +
scale_y_continuous(labels = comma
#, breaks = seq(-2000,2000, 500)
) +
scale_fill_manual(values = wes_palette("Darjeeling2")) +
theme(panel.grid.major = element_blank()) +
facet_wrap(~continent) +
labs(title = "World UN Voting trend over the years by continent",
y = "", x = "",
caption = "created by ViSa")